# Base libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
# NetworkX
import networkx as nx
import osmnx as ox
# OS environment setup
from local_directories import *
# Reset random seeds
random_seed = 2674
# Other
neighbourhood_min_nodes = 8
max_distance = 500
# Load Leciester's graph
leicester_osmnx_graph = ox.io.load_graphml(bulk_storage_directory + "/osmnx/raw_excluded/leicester-1864.graphml")
leicester_osmnx_graph_prj = ox.project_graph(leicester_osmnx_graph)
len(list(leicester_osmnx_graph.nodes))
13293
ox.plot_graph(
leicester_osmnx_graph_prj,
node_size=5, node_color="#000000",
edge_color="#000000", edge_linewidth=0.1,
bgcolor="#ffffff",
figsize=(16, 16))
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
# Convert graph to dataframe version
leicester_osmnx_graph_prj_df = None
for node in leicester_osmnx_graph_prj:
node_dict = leicester_osmnx_graph_prj.nodes[node]
node_dict["osmnx_node_id"] = int(node)
# node_dict["osmnx_node_id"] = str(node)
if leicester_osmnx_graph_prj_df is None:
leicester_osmnx_graph_prj_df = pd.DataFrame.from_dict([node_dict])
else:
leicester_osmnx_graph_prj_df = pd.concat([leicester_osmnx_graph_prj_df, pd.DataFrame.from_dict([node_dict])])
leicester_osmnx_graph_prj_df.head()
| y | x | street_count | elevation | elevation_aster | elevation_srtm | lon | lat | osmnx_node_id | ref | highway | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.829804e+06 | 622151.977595 | 3 | 72.0 | 35 | 72 | -1.196195 | 52.604506 | 194739 | NaN | NaN |
| 0 | 5.829991e+06 | 622098.041002 | 3 | 72.0 | 45 | 72 | -1.196922 | 52.606196 | 1551014281 | NaN | NaN |
| 0 | 5.828827e+06 | 622259.813792 | 2 | 79.0 | 57 | 79 | -1.194965 | 52.595696 | 326312 | 21 | motorway_junction |
| 0 | 5.830107e+06 | 622077.742140 | 3 | 79.0 | 43 | 79 | -1.197179 | 52.607245 | 326320 | 21 | motorway_junction |
| 0 | 5.829673e+06 | 622220.645785 | 3 | 74.0 | 35 | 74 | -1.195230 | 52.603314 | 2627867454 | NaN | NaN |
# Load Leciester's embeddings
leicester_emb_df = pd.read_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5.csv")
leicester_emb_df.head()
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 |
| 1 | 337979 | -0.322662 | -0.882213 |
| 2 | 337983 | -0.009132 | 0.948856 |
| 3 | 337985 | -0.136350 | 0.965531 |
| 4 | 337986 | -0.203456 | 0.447374 |
fig = px.scatter(
leicester_emb_df,
x="EMB000",
y="EMB001",
hover_data=['osmnx_node_id'],
width=800, height=800
)
fig.update_layout({"plot_bgcolor": "#ffffff"})
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
for node in leicester_osmnx_graph_prj.nodes:
if len(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values) == 0:
leicester_osmnx_graph_prj.nodes[node]["EMB000"] = None
leicester_osmnx_graph_prj.nodes[node]["EMB001"] = None
else:
leicester_osmnx_graph_prj.nodes[node]["EMB000"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values)
leicester_osmnx_graph_prj.nodes[node]["EMB001"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB001"].values)
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
leicester_osmnx_graph_prj.nodes[node]["EMB000"] for node in leicester_osmnx_graph_prj.nodes],
node_size=10, bgcolor="#ffffff",
figsize=(16, 16))
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
leicester_osmnx_graph_prj.nodes[node]["EMB001"] for node in leicester_osmnx_graph_prj.nodes],
node_size=10, bgcolor="#ffffff",
figsize=(16, 16))
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
Checking correlations between node and ego-graph pooled embeddings, and OSMnx statistics for the nodes within the city-wide network, the nodes within their ego-graph used to create the embeddings, and the basic stats for the ego-graph used to create the embeddings
leicester_emb_stats_for_corr = \
leicester_emb_df[["osmnx_node_id", "EMB000", "EMB001"]].merge(
# Ego-graph pooled embeddings
pd.read_csv(this_repo_directory +
"/data/leicester-1864_emb-pool_gnnuf_model_v0-5.csv"
).rename(columns={"EMB000":"EMB000pooled", "EMB001":"EMB001pooled"}),
on="osmnx_node_id"
).merge(
# Centrality including node-based and ego-graph-based
pd.read_csv(this_repo_directory +
"/data/leicester-1864_stats_node_centrality_with_egograph_dist500.csv"
).rename(columns={"node_id":"osmnx_node_id"}),
on="osmnx_node_id"
).merge(
# Ego-graph basic stats
pd.read_csv(this_repo_directory +
"/data/leicester-1864_stats_egograph_basic_dist500.csv"
).rename(columns={"node_id":"osmnx_node_id"}
).dropna(subset=["osmnx_node_id"])[
["osmnx_node_id","n", "m", "k_avg", "edge_length_total", "edge_length_avg",
"streets_per_node_avg", "intersection_count", "street_length_total",
"street_segment_count", "street_length_avg", "circuity_avg"]],
on="osmnx_node_id"
)
leicester_emb_stats_for_corr.head()
| osmnx_node_id | EMB000 | EMB001 | EMB000pooled | EMB001pooled | closeness_networkwide | betweenness_networkwide | closeness_egograph | betweenness_egograph | n | m | k_avg | edge_length_total | edge_length_avg | streets_per_node_avg | intersection_count | street_length_total | street_segment_count | street_length_avg | circuity_avg | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 | -0.214178 | 0.073930 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 11.0 | 11.0 | 2.0 | 1261.861 | 114.714636 | 3.0 | 11.0 | 1261.861 | 11.0 | 114.714636 | 1.038343 |
| 1 | 337979 | -0.322662 | -0.882213 | -0.112598 | -0.131984 | 0.000150 | 0.000149 | 0.166667 | 0.106061 | 13.0 | 13.0 | 2.0 | 2126.471 | 163.574692 | 3.0 | 13.0 | 2126.471 | 13.0 | 163.574692 | 1.030988 |
| 2 | 337983 | -0.009132 | 0.948856 | -0.074082 | 0.194142 | 0.000285 | 0.000298 | 0.230769 | 0.115385 | 14.0 | 14.0 | 2.0 | 1870.996 | 133.642571 | 3.0 | 14.0 | 1870.996 | 14.0 | 133.642571 | 1.048630 |
| 3 | 337985 | -0.136350 | 0.965531 | -0.095356 | 0.193887 | 0.015656 | 0.000000 | 0.274725 | 0.000000 | 14.0 | 14.0 | 2.0 | 1815.929 | 129.709214 | 3.0 | 14.0 | 1815.929 | 14.0 | 129.709214 | 1.050192 |
| 4 | 337986 | -0.203456 | 0.447374 | -0.074082 | 0.194142 | 0.000249 | 0.000373 | 0.198381 | 0.096154 | 14.0 | 14.0 | 2.0 | 1870.996 | 133.642571 | 3.0 | 14.0 | 1870.996 | 14.0 | 133.642571 | 1.048630 |
print(leicester_emb_stats_for_corr.drop(columns=["osmnx_node_id"]).corr(method="kendall"))
EMB000 EMB001 EMB000pooled EMB001pooled
EMB000 1.000000 -0.050229 0.414767 -0.117148 \
EMB001 -0.050229 1.000000 -0.104923 0.393005
EMB000pooled 0.414767 -0.104923 1.000000 -0.170566
EMB001pooled -0.117148 0.393005 -0.170566 1.000000
closeness_networkwide 0.262182 -0.194019 0.364707 -0.337499
betweenness_networkwide 0.242117 -0.025702 0.117417 -0.155388
closeness_egograph 0.295632 0.135150 0.335473 0.202459
betweenness_egograph 0.260210 0.097279 0.125096 -0.017320
n -0.033401 -0.103946 -0.138311 -0.226253
m 0.013223 -0.100975 -0.068330 -0.212787
k_avg 0.260863 0.005101 0.376507 0.036726
edge_length_total 0.210405 -0.130952 0.208221 -0.245809
edge_length_avg 0.370231 -0.044676 0.579544 -0.021938
streets_per_node_avg 0.280222 -0.231995 0.430593 -0.420593
intersection_count 0.047278 -0.143725 -0.019241 -0.302492
street_length_total 0.191975 -0.163191 0.190095 -0.315328
street_segment_count 0.008809 -0.133773 -0.069979 -0.284915
street_length_avg 0.365477 -0.044273 0.588756 -0.014573
circuity_avg -0.028421 0.131059 -0.065721 0.225035
closeness_networkwide betweenness_networkwide
EMB000 0.262182 0.242117 \
EMB001 -0.194019 -0.025702
EMB000pooled 0.364707 0.117417
EMB001pooled -0.337499 -0.155388
closeness_networkwide 1.000000 0.245741
betweenness_networkwide 0.245741 1.000000
closeness_egograph 0.060927 0.037740
betweenness_egograph 0.146073 0.666984
n 0.047026 0.268784
m 0.058577 0.266006
k_avg 0.090216 0.047317
edge_length_total 0.243250 0.369374
edge_length_avg 0.308200 0.140604
streets_per_node_avg 0.448797 0.226158
intersection_count 0.157044 0.319659
street_length_total 0.284100 0.390023
street_segment_count 0.107291 0.295211
street_length_avg 0.301521 0.126597
circuity_avg -0.116848 -0.028826
closeness_egograph betweenness_egograph n
EMB000 0.295632 0.260210 -0.033401 \
EMB001 0.135150 0.097279 -0.103946
EMB000pooled 0.335473 0.125096 -0.138311
EMB001pooled 0.202459 -0.017320 -0.226253
closeness_networkwide 0.060927 0.146073 0.047026
betweenness_networkwide 0.037740 0.666984 0.268784
closeness_egograph 1.000000 0.205350 -0.452824
betweenness_egograph 0.205350 1.000000 0.145905
n -0.452824 0.145905 1.000000
m -0.367824 0.157868 0.891733
k_avg 0.323223 0.100583 0.013712
edge_length_total -0.146388 0.270640 0.589867
edge_length_avg 0.443310 0.178665 -0.296360
streets_per_node_avg -0.037485 0.092405 0.097850
intersection_count -0.420096 0.171545 0.826980
street_length_total -0.219952 0.256335 0.617244
street_segment_count -0.440343 0.155661 0.902224
street_length_avg 0.460244 0.169671 -0.292572
circuity_avg 0.082450 0.038269 -0.118638
m k_avg edge_length_total
EMB000 0.013223 0.260863 0.210405 \
EMB001 -0.100975 0.005101 -0.130952
EMB000pooled -0.068330 0.376507 0.208221
EMB001pooled -0.212787 0.036726 -0.245809
closeness_networkwide 0.058577 0.090216 0.243250
betweenness_networkwide 0.266006 0.047317 0.369374
closeness_egograph -0.367824 0.323223 -0.146388
betweenness_egograph 0.157868 0.100583 0.270640
n 0.891733 0.013712 0.589867
m 1.000000 0.132379 0.665304
k_avg 0.132379 1.000000 0.305397
edge_length_total 0.665304 0.305397 1.000000
edge_length_avg -0.234006 0.292995 0.104432
streets_per_node_avg 0.117131 0.151434 0.321869
intersection_count 0.810128 0.057609 0.674507
street_length_total 0.656864 0.211502 0.879629
street_segment_count 0.878092 0.040801 0.635415
street_length_avg -0.224905 0.323991 0.108293
circuity_avg -0.118920 -0.000375 -0.089911
edge_length_avg streets_per_node_avg
EMB000 0.370231 0.280222 \
EMB001 -0.044676 -0.231995
EMB000pooled 0.579544 0.430593
EMB001pooled -0.021938 -0.420593
closeness_networkwide 0.308200 0.448797
betweenness_networkwide 0.140604 0.226158
closeness_egograph 0.443310 -0.037485
betweenness_egograph 0.178665 0.092405
n -0.296360 0.097850
m -0.234006 0.117131
k_avg 0.292995 0.151434
edge_length_total 0.104432 0.321869
edge_length_avg 1.000000 0.340156
streets_per_node_avg 0.340156 1.000000
intersection_count -0.166574 0.279544
street_length_total 0.080524 0.389084
street_segment_count -0.232123 0.197164
street_length_avg 0.904994 0.316873
circuity_avg 0.048389 -0.153925
intersection_count street_length_total
EMB000 0.047278 0.191975 \
EMB001 -0.143725 -0.163191
EMB000pooled -0.019241 0.190095
EMB001pooled -0.302492 -0.315328
closeness_networkwide 0.157044 0.284100
betweenness_networkwide 0.319659 0.390023
closeness_egograph -0.420096 -0.219952
betweenness_egograph 0.171545 0.256335
n 0.826980 0.617244
m 0.810128 0.656864
k_avg 0.057609 0.211502
edge_length_total 0.674507 0.879629
edge_length_avg -0.166574 0.080524
streets_per_node_avg 0.279544 0.389084
intersection_count 1.000000 0.739393
street_length_total 0.739393 1.000000
street_segment_count 0.910502 0.687454
street_length_avg -0.170992 0.084041
circuity_avg -0.143807 -0.106058
street_segment_count street_length_avg circuity_avg
EMB000 0.008809 0.365477 -0.028421
EMB001 -0.133773 -0.044273 0.131059
EMB000pooled -0.069979 0.588756 -0.065721
EMB001pooled -0.284915 -0.014573 0.225035
closeness_networkwide 0.107291 0.301521 -0.116848
betweenness_networkwide 0.295211 0.126597 -0.028826
closeness_egograph -0.440343 0.460244 0.082450
betweenness_egograph 0.155661 0.169671 0.038269
n 0.902224 -0.292572 -0.118638
m 0.878092 -0.224905 -0.118920
k_avg 0.040801 0.323991 -0.000375
edge_length_total 0.635415 0.108293 -0.089911
edge_length_avg -0.232123 0.904994 0.048389
streets_per_node_avg 0.197164 0.316873 -0.153925
intersection_count 0.910502 -0.170992 -0.143807
street_length_total 0.687454 0.084041 -0.106058
street_segment_count 1.000000 -0.233779 -0.132164
street_length_avg -0.233779 1.000000 0.043635
circuity_avg -0.132164 0.043635 1.000000
# Double-checking difference with Spearman's rho
print(leicester_emb_stats_for_corr.drop(columns=["osmnx_node_id"]).corr(method="spearman"))
EMB000 EMB001 EMB000pooled EMB001pooled
EMB000 1.000000 -0.077134 0.598860 -0.184985 \
EMB001 -0.077134 1.000000 -0.161475 0.569221
EMB000pooled 0.598860 -0.161475 1.000000 -0.270270
EMB001pooled -0.184985 0.569221 -0.270270 1.000000
closeness_networkwide 0.388640 -0.292064 0.537505 -0.505571
betweenness_networkwide 0.354346 -0.053826 0.170578 -0.225798
closeness_egograph 0.437037 0.202000 0.490640 0.291693
betweenness_egograph 0.380707 0.151481 0.181311 -0.027183
n -0.049975 -0.157148 -0.204113 -0.329446
m 0.021164 -0.152845 -0.101058 -0.310380
k_avg 0.388864 0.003666 0.545514 0.045068
edge_length_total 0.320914 -0.202643 0.310028 -0.358725
edge_length_avg 0.534784 -0.066179 0.775982 -0.038231
streets_per_node_avg 0.414339 -0.348581 0.610777 -0.605623
intersection_count 0.072714 -0.218416 -0.026976 -0.434794
street_length_total 0.294263 -0.253735 0.286724 -0.456843
street_segment_count 0.014611 -0.203271 -0.103784 -0.410763
street_length_avg 0.528989 -0.065379 0.785537 -0.026850
circuity_avg -0.043169 0.195740 -0.100317 0.327021
closeness_networkwide betweenness_networkwide
EMB000 0.388640 0.354346 \
EMB001 -0.292064 -0.053826
EMB000pooled 0.537505 0.170578
EMB001pooled -0.505571 -0.225798
closeness_networkwide 1.000000 0.350715
betweenness_networkwide 0.350715 1.000000
closeness_egograph 0.087531 0.051907
betweenness_egograph 0.212391 0.835012
n 0.069174 0.382218
m 0.087432 0.380044
k_avg 0.129970 0.066617
edge_length_total 0.360226 0.529066
edge_length_avg 0.456250 0.204010
streets_per_node_avg 0.641996 0.326195
intersection_count 0.233107 0.453933
street_length_total 0.418098 0.556922
street_segment_count 0.159911 0.419911
street_length_avg 0.448151 0.184167
circuity_avg -0.176367 -0.041270
closeness_egograph betweenness_egograph n
EMB000 0.437037 0.380707 -0.049975 \
EMB001 0.202000 0.151481 -0.157148
EMB000pooled 0.490640 0.181311 -0.204113
EMB001pooled 0.291693 -0.027183 -0.329446
closeness_networkwide 0.087531 0.212391 0.069174
betweenness_networkwide 0.051907 0.835012 0.382218
closeness_egograph 1.000000 0.297919 -0.629959
betweenness_egograph 0.297919 1.000000 0.213167
n -0.629959 0.213167 1.000000
m -0.523668 0.230822 0.980338
k_avg 0.458938 0.152805 0.015981
edge_length_total -0.216647 0.397477 0.784142
edge_length_avg 0.620514 0.255275 -0.425638
streets_per_node_avg -0.060167 0.143119 0.144189
intersection_count -0.590339 0.251619 0.953861
street_length_total -0.322573 0.381201 0.806319
street_segment_count -0.616406 0.228824 0.983940
street_length_avg 0.640899 0.242273 -0.420878
circuity_avg 0.123876 0.054817 -0.173852
m k_avg edge_length_total
EMB000 0.021164 0.388864 0.320914 \
EMB001 -0.152845 0.003666 -0.202643
EMB000pooled -0.101058 0.545514 0.310028
EMB001pooled -0.310380 0.045068 -0.358725
closeness_networkwide 0.087432 0.129970 0.360226
betweenness_networkwide 0.380044 0.066617 0.529066
closeness_egograph -0.523668 0.458938 -0.216647
betweenness_egograph 0.230822 0.152805 0.397477
n 0.980338 0.015981 0.784142
m 1.000000 0.188244 0.851935
k_avg 0.188244 1.000000 0.432809
edge_length_total 0.851935 0.432809 1.000000
edge_length_avg -0.341298 0.432676 0.160137
streets_per_node_avg 0.172351 0.206537 0.464561
intersection_count 0.944876 0.071353 0.857755
street_length_total 0.842110 0.292745 0.972294
street_segment_count 0.971088 0.050415 0.827154
street_length_avg -0.328617 0.474856 0.165242
circuity_avg -0.173620 -0.001432 -0.132332
edge_length_avg streets_per_node_avg
EMB000 0.534784 0.414339 \
EMB001 -0.066179 -0.348581
EMB000pooled 0.775982 0.610777
EMB001pooled -0.038231 -0.605623
closeness_networkwide 0.456250 0.641996
betweenness_networkwide 0.204010 0.326195
closeness_egograph 0.620514 -0.060167
betweenness_egograph 0.255275 0.143119
n -0.425638 0.144189
m -0.341298 0.172351
k_avg 0.432676 0.206537
edge_length_total 0.160137 0.464561
edge_length_avg 1.000000 0.499462
streets_per_node_avg 0.499462 1.000000
intersection_count -0.240841 0.406843
street_length_total 0.128629 0.558292
street_segment_count -0.338067 0.291215
street_length_avg 0.983105 0.469362
circuity_avg 0.067899 -0.230816
intersection_count street_length_total
EMB000 0.072714 0.294263 \
EMB001 -0.218416 -0.253735
EMB000pooled -0.026976 0.286724
EMB001pooled -0.434794 -0.456843
closeness_networkwide 0.233107 0.418098
betweenness_networkwide 0.453933 0.556922
closeness_egograph -0.590339 -0.322573
betweenness_egograph 0.251619 0.381201
n 0.953861 0.806319
m 0.944876 0.842110
k_avg 0.071353 0.292745
edge_length_total 0.857755 0.972294
edge_length_avg -0.240841 0.128629
streets_per_node_avg 0.406843 0.558292
intersection_count 1.000000 0.904625
street_length_total 0.904625 1.000000
street_segment_count 0.985890 0.867512
street_length_avg -0.248148 0.133033
circuity_avg -0.209816 -0.156784
street_segment_count street_length_avg circuity_avg
EMB000 0.014611 0.528989 -0.043169
EMB001 -0.203271 -0.065379 0.195740
EMB000pooled -0.103784 0.785537 -0.100317
EMB001pooled -0.410763 -0.026850 0.327021
closeness_networkwide 0.159911 0.448151 -0.176367
betweenness_networkwide 0.419911 0.184167 -0.041270
closeness_egograph -0.616406 0.640899 0.123876
betweenness_egograph 0.228824 0.242273 0.054817
n 0.983940 -0.420878 -0.173852
m 0.971088 -0.328617 -0.173620
k_avg 0.050415 0.474856 -0.001432
edge_length_total 0.827154 0.165242 -0.132332
edge_length_avg -0.338067 0.983105 0.067899
streets_per_node_avg 0.291215 0.469362 -0.230816
intersection_count 0.985890 -0.248148 -0.209816
street_length_total 0.867512 0.133033 -0.156784
street_segment_count 1.000000 -0.340965 -0.192780
street_length_avg -0.340965 1.000000 0.060435
circuity_avg -0.192780 0.060435 1.000000
sns.pairplot(leicester_emb_stats_for_corr.drop(columns=["osmnx_node_id"]), kind="hist")
<seaborn.axisgrid.PairGrid at 0x1a3613d90>
leicester_emb_stats_for_corr.to_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5_incl-pool-with-stats.csv", index=False)
Check specific nodes based on their embeddings and ego-graph structure
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 6782625866]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 12711 | 6782625866 | -0.999753 | -0.943405 |
ego_6782625866 = nx.generators.ego_graph(
leicester_osmnx_graph, 6782625866,
radius=max_distance, undirected=True, distance="length")
ego_6782625866_prj = ox.project_graph(ego_6782625866)
ox.plot_graph(
ego_6782625866_prj,
node_size=[20 if node == 6782625866 else 5 for node in ego_6782625866_prj.nodes],
node_color=["#e41a1c" if node == 6782625866 else "#ffffff" for node in ego_6782625866_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 354554417]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 4733 | 354554417 | -0.966505 | 0.982919 |
ego_354554417 = nx.generators.ego_graph(
leicester_osmnx_graph, 354554417,
radius=max_distance, undirected=True, distance="length")
ego_354554417_prj = ox.project_graph(ego_354554417)
ox.plot_graph(
ego_354554417_prj,
node_size=[20 if node == 354554417 else 5 for node in ego_354554417_prj.nodes],
node_color=["#e41a1c" if node == 354554417 else "#ffffff" for node in ego_354554417_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 1179199412]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 8021 | 1179199412 | 0.999113 | 0.99909 |
ego_1179199412 = nx.generators.ego_graph(
leicester_osmnx_graph, 1179199412,
radius=max_distance, undirected=True, distance="length")
ego_1179199412_prj = ox.project_graph(ego_1179199412)
ox.plot_graph(
ego_1179199412_prj,
node_size=[20 if node == 1179199412 else 5 for node in ego_1179199412_prj.nodes],
node_color=["#e41a1c" if node == 1179199412 else "#ffffff" for node in ego_1179199412_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 2858142815]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 11121 | 2858142815 | 0.002341 | 0.165836 |
ego_2858142815 = nx.generators.ego_graph(
leicester_osmnx_graph, 2858142815,
radius=max_distance, undirected=True, distance="length")
ego_2858142815_prj = ox.project_graph(ego_2858142815)
ox.plot_graph(
ego_2858142815_prj,
node_size=[20 if node == 2858142815 else 5 for node in ego_2858142815_prj.nodes],
node_color=["#e41a1c" if node == 2858142815 else "#ffffff" for node in ego_2858142815_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 296162322]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 3845 | 296162322 | 0.99999 | -0.999946 |
ego_296162322 = nx.generators.ego_graph(
leicester_osmnx_graph, 296162322,
radius=max_distance, undirected=True, distance="length")
ego_296162322_prj = ox.project_graph(ego_296162322)
ox.plot_graph(
ego_296162322_prj,
node_size=[20 if node == 296162322 else 5 for node in ego_296162322_prj.nodes],
node_color=["#e41a1c" if node == 296162322 else "#ffffff" for node in ego_296162322_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_patters_df = leicester_emb_df.copy()
from sklearn.cluster import DBSCAN
leicester_emb_df_clust = leicester_emb_df[["EMB000", "EMB001"]].dropna()
clust = DBSCAN(eps=0.11, min_samples=300)
leicester_emb_patters_df["clust"] = clust.fit_predict(leicester_emb_df_clust)
leicester_emb_patters_df["clust"].nunique()
8
colorbrewer_set1 = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999"]
leicester_emb_patters_df["clust_colour"] = leicester_emb_patters_df["clust"].apply(lambda x: colorbrewer_set1[x])
leicester_emb_patters_df.head()
| osmnx_node_id | EMB000 | EMB001 | clust | clust_colour | |
|---|---|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 | -1 | #999999 |
| 1 | 337979 | -0.322662 | -0.882213 | -1 | #999999 |
| 2 | 337983 | -0.009132 | 0.948856 | -1 | #999999 |
| 3 | 337985 | -0.136350 | 0.965531 | -1 | #999999 |
| 4 | 337986 | -0.203456 | 0.447374 | -1 | #999999 |
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
c=leicester_emb_patters_df.clust_colour,
s=5, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
mode='markers',
marker=dict(color=leicester_emb_patters_df.clust_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
def bivariate_colour(x, limits):
if x[0] is None or x[1] is None:
return None
else:
if x[0] <= limits[0, 0]:
if x[1] <= limits[1, 0]:
# return "#e8e8e8"
return "#e8e8e8"
elif x[1] <= limits[1, 1]:
# return "#cbb8d7"
return "#e4acac"
else:
# return "#9972af"
return "#c85a5a"
if x[0] <= limits[0, 1]:
if x[1] <= limits[1, 0]:
# return "#e4d9ac"
return "#b0d5df"
elif x[1] <= limits[1, 1]:
# return "#c8ada0"
return "#ad9ea5"
else:
# return "#976b82"
return "#985356"
else:
if x[1] <= limits[1, 0]:
# return "#c8b35a"
return "#64acbe"
elif x[1] <= limits[1, 1]:
# return "#af8e53"
return "#627f8c"
else:
# return "#804d36"
return "#574249"
leicester_emb_quantiles = leicester_emb_df[["EMB000", "EMB001"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_patters_df["bivariate_colour"] = leicester_emb_patters_df.apply(
lambda x: bivariate_colour([x["EMB000"], x["EMB001"]], leicester_emb_quantiles), axis=1
)
leicester_emb_patters_df.head()
| osmnx_node_id | EMB000 | EMB001 | clust | clust_colour | bivariate_colour | |
|---|---|---|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 | -1 | #999999 | #e8e8e8 |
| 1 | 337979 | -0.322662 | -0.882213 | -1 | #999999 | #e8e8e8 |
| 2 | 337983 | -0.009132 | 0.948856 | -1 | #999999 | #985356 |
| 3 | 337985 | -0.136350 | 0.965531 | -1 | #999999 | #c85a5a |
| 4 | 337986 | -0.203456 | 0.447374 | -1 | #999999 | #e4acac |
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
c=leicester_emb_patters_df.bivariate_colour,
s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
mode='markers',
marker=dict(color=leicester_emb_patters_df.bivariate_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
leicester_osmnx_bivariate = leicester_osmnx_graph_prj.copy()
for node in leicester_osmnx_bivariate.nodes:
node_bivariate_colour = leicester_emb_patters_df.loc[leicester_emb_patters_df["osmnx_node_id"] == node]
if node_bivariate_colour.empty:
leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = "#000000"
leicester_osmnx_bivariate.nodes[node]["clust_colour"] = "#000000"
leicester_osmnx_bivariate.nodes[node]["node_size"] = 1
else:
leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = node_bivariate_colour["bivariate_colour"].values[0]
leicester_osmnx_bivariate.nodes[node]["clust_colour"] = node_bivariate_colour["clust_colour"].values[0]
leicester_osmnx_bivariate.nodes[node]["node_size"] = 7
ox.plot_graph(
leicester_osmnx_bivariate,
node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 1 for node in leicester_osmnx_bivariate.nodes],
bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
figsize=(12, 12))
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
# ox.plot_graph(
# leicester_osmnx_bivariate,
# node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
# node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*8 if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 8 for node in leicester_osmnx_bivariate.nodes],
# bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
# figsize=(48, 48))
ox.plot_graph(
leicester_osmnx_bivariate,
node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"] for node in leicester_osmnx_bivariate.nodes],
bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
figsize=(12, 12))
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
# ox.plot_graph(
# leicester_osmnx_bivariate,
# node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
# node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*2 for node in leicester_osmnx_bivariate.nodes],
# bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
# figsize=(24, 24))
import geopandas as gpd
leicester_gdf = gpd.GeoDataFrame(
leicester_osmnx_graph_prj_df,
geometry=gpd.points_from_xy(
leicester_osmnx_graph_prj_df.lon,
leicester_osmnx_graph_prj_df.lat
),
crs="EPSG:4326"
).merge(leicester_emb_patters_df, on='osmnx_node_id', how='left')
leicester_gdf.head()
| y | x | street_count | elevation | elevation_aster | elevation_srtm | lon | lat | osmnx_node_id | ref | highway | geometry | EMB000 | EMB001 | clust | clust_colour | bivariate_colour | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.829804e+06 | 622151.977595 | 3 | 72.0 | 35 | 72 | -1.196195 | 52.604506 | 194739 | NaN | NaN | POINT (-1.19620 52.60451) | NaN | NaN | NaN | NaN | NaN |
| 1 | 5.829991e+06 | 622098.041002 | 3 | 72.0 | 45 | 72 | -1.196922 | 52.606196 | 1551014281 | NaN | NaN | POINT (-1.19692 52.60620) | NaN | NaN | NaN | NaN | NaN |
| 2 | 5.828827e+06 | 622259.813792 | 2 | 79.0 | 57 | 79 | -1.194965 | 52.595696 | 326312 | 21 | motorway_junction | POINT (-1.19497 52.59570) | NaN | NaN | NaN | NaN | NaN |
| 3 | 5.830107e+06 | 622077.742140 | 3 | 79.0 | 43 | 79 | -1.197179 | 52.607245 | 326320 | 21 | motorway_junction | POINT (-1.19718 52.60724) | NaN | NaN | NaN | NaN | NaN |
| 4 | 5.829673e+06 | 622220.645785 | 3 | 74.0 | 35 | 74 | -1.195230 | 52.603314 | 2627867454 | NaN | NaN | POINT (-1.19523 52.60331) | 0.435371 | -0.637928 | -1.0 | #999999 | #b0d5df |
leicester_gdf[leicester_gdf["clust_colour"]!=colorbrewer_set1[-1]].dropna(subset=["EMB000"]).explore(
color="clust_colour",
marker_kwds={"radius": 7}, style_kwds={"stroke": False},
tiles="Stamen Toner"
)
leicester_gdf[leicester_gdf["bivariate_colour"]!="#000000"].dropna(subset=["EMB000"]).explore(
color="bivariate_colour",
marker_kwds={"radius": 7}, style_kwds={"stroke": False},
legend=True,
tiles="Stamen Toner"
)